import sys
sys.path.append('..')
import numpy as np
from common.util import preprocess

text = 'You say goodbye and I say hello.'

def preprocess(text):

    text = text.lower()
    text = text.replace('.', ' .')
    words = text.split(' ')

    word_to_id = {}
    id_to_word = {}
    for word in words:
        if word not in word_to_id:
            id = len(id_to_word)
            word_to_id[word] = id
            id_to_word[id] = word
    corpus = [word_to_id[word] for word in words]
    corpus = np.array(corpus)
    return corpus, word_to_id, id_to_word

corpus, word_to_id, id_to_word = preprocess(text)
print('word_to_id: ', word_to_id)
print('id_to_word: ', id_to_word)

print('id_to_word[0]: ', id_to_word[0])
print("word_to_id['hello']: ", word_to_id['hello'])
print('corpus: ', corpus)
# text = 'You say goodbye and I say hello.'
# corpus, word_to_id, id_to_word = preprocess(text)
# print('corpus: \n', corpus)
# print('word_to_id: \n', word_to_id)
# print('id_to_word: \n', id_to_word)